# clearing the memory

rm(list=ls())
gc()

# loading the data

load("~/aLife_super_data_trimmed.RData")

# loading packages

library(readr)
library(dplyr)
library(tidyr)
library(tidyverse)
library(data.table)

# creating three filtered datasets

filtered_dataset <- combined_dataset %>%
  group_by(alife_id_001) %>%
  mutate(sp_lag = lag(sp_flag)) %>%
  filter(income_year >= (c_death_fy - 1))

# calculating the annual superannuation death benefit flow for each of the three datasets

summarised_dataset <- filtered_dataset %>%
  group_by(alife_id_001) %>%
  summarise(super_balance = max(sb_mem_bal),
            year_of_death = mean(c_death_fy)) %>%
  ungroup()

yearly_super_death_benefits <- summarised_dataset %>%
  group_by(year_of_death) %>%
  summarise(death_benefits = sum(super_balance),
            num_deaths = n())

single_dataset <- filtered_dataset %>%
  filter(sp_flag == 0) %>%
  group_by(alife_id_001) %>%
  summarise(super_balance = max(sb_mem_bal), 
            year_of_death = mean(c_death_fy)) %>%
  ungroup()

yearly_single_death_benefits <- single_dataset %>%
  group_by(year_of_death) %>%
  summarise(single_death_benefits = sum(super_balance),
            num_deaths_single = n())

couple_dataset <- filtered_dataset %>%
  filter(sp_flag == 1) %>%
  group_by(alife_id_001) %>%
  summarise(super_balance = max(sb_mem_bal), 
            year_of_death = mean(c_death_fy)) %>%
  ungroup()

yearly_couple_death_benefits <- couple_dataset %>%
  group_by(year_of_death) %>%
  summarise(couple_death_benefits = sum(super_balance), 
            num_deaths_couple = n())


lag_single_dataset <- filtered_dataset %>%
  filter(sp_lag == 0) %>%
  group_by(alife_id_001) %>%
  summarise(super_balance = max(sb_mem_bal), 
            year_of_death = mean(c_death_fy)) %>%
  ungroup()

yearly_lag_single_death_benefits <- lag_single_dataset %>%
  group_by(year_of_death) %>%
  summarise(lag_single_death_benefits = sum(super_balance), 
            num_deaths_lag_single = n())

lag_couple_dataset <- filtered_dataset %>%
  filter(sp_lag == 1) %>%
  group_by(alife_id_001) %>%
  summarise(super_balance = max(sb_mem_bal), 
            year_of_death = mean(c_death_fy)) %>%
  ungroup()

yearly_lag_couple_death_benefits <- lag_couple_dataset %>%
  group_by(year_of_death) %>%
  summarise(lag_couple_death_benefits = sum(super_balance), 
            num_deaths_lag_couple = n())

# joining the datasets for comparison

yearly_death_benefits <- full_join(yearly_super_death_benefits, yearly_single_death_benefits, by = "year_of_death")
yearly_death_benefits <- full_join(yearly_death_benefits, yearly_couple_death_benefits, by = "year_of_death")
yearly_death_benefits <- full_join(yearly_death_benefits, yearly_lag_single_death_benefits, by = "year_of_death")
yearly_death_benefits <- full_join(yearly_death_benefits, yearly_lag_couple_death_benefits, by = "year_of_death")

yearly_death_benefits <- yearly_death_benefits %>%
  mutate(avg_balance = death_benefits / num_deaths) %>%
  mutate(avg_single_balance = single_death_benefits / num_deaths_single) %>%
  mutate(avg_couple_balance = couple_death_benefits / num_deaths_couple) %>%
  mutate(avg_lag_single_balance = lag_single_death_benefits / num_deaths_lag_single) %>%
  mutate(avg_lag_couple_balance = lag_couple_death_benefits / num_deaths_lag_couple)
  
yearly_death_benefits <- yearly_death_benefits %>%
  mutate(missing_death_benefits = (death_benefits - single_death_benefits - couple_death_benefits)) %>%
  mutate(lag_missing_death_benefits = (death_benefits - lag_single_death_benefits - lag_couple_death_benefits)) %>%
  mutate(num_deaths_missing = (num_deaths - num_deaths_single - num_deaths_couple)) %>%
  mutate(num_deaths_lag_missing = (num_deaths - num_deaths_lag_single - num_deaths_lag_couple))

yearly_death_benefits <- yearly_death_benefits %>%
  mutate(avg_missing_balance = missing_death_benefits / num_deaths_missing) %>%
  mutate(avg_lag_missing_balance = lag_missing_death_benefits / num_deaths_lag_missing)

yearly_death_benefits <- yearly_death_benefits %>%
  filter(year_of_death >= 2010)

write_csv(yearly_death_benefits, "yearly_death_benefits.csv")
write.csv(yearly_death_benefits, "yearly_death_benefits2.csv")
write_excel_csv(yearly_death_benefits, "yearly_death_benefits3.xls")

# saving the dataset

save.image("~/aLife_death_benefit_flows_trimmed.RData")